{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.sparse import csr_matrix\n",
    "import sparse_dot_topn.sparse_dot_topn as ct\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('data/Seattle_Hotels_Duplicates.csv', encoding=\"latin-1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Hilton Garden Inn Seattle Downtown</td>\n",
       "      <td>1821 Boren Avenue, Seattle Washington 98101 USA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Sheraton Grand Seattle</td>\n",
       "      <td>1400 6th Avenue, Seattle, Washington 98101 USA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Crowne Plaza Seattle Downtown</td>\n",
       "      <td>1113 6th Ave, Seattle, WA 98101</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 name  \\\n",
       "0  Hilton Garden Inn Seattle Downtown   \n",
       "1              Sheraton Grand Seattle   \n",
       "2       Crowne Plaza Seattle Downtown   \n",
       "\n",
       "                                           address  \n",
       "0  1821 Boren Avenue, Seattle Washington 98101 USA  \n",
       "1   1400 6th Avenue, Seattle, Washington 98101 USA  \n",
       "2                  1113 6th Ave, Seattle, WA 98101  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Roy Street Commons                                 2\n",
       "Comfort Inn & Suites                               2\n",
       "Seattle Inn Northgate                              2\n",
       "Hotel Seattle                                      2\n",
       "Ace Hotel Seattle                                  2\n",
       "                                                  ..\n",
       "Econo Lodge Renton-Bellevue                        1\n",
       "Hotel Theodore                                     1\n",
       "La Quinta Inn & Suites Seattle Downtown            1\n",
       "Thompson Seattle                                   1\n",
       "Travelodge by Wyndham Seattle North of Downtown    1\n",
       "Name: name, Length: 155, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.name.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>82</td>\n",
       "      <td>Roy Street Commons</td>\n",
       "      <td>621 12th Ave E, Seattle, WA 98102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>90</td>\n",
       "      <td>Roy Street Commons</td>\n",
       "      <td>621 12th Avenue East, Seattle, Washington 98102</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name                                          address\n",
       "82  Roy Street Commons                621 12th Ave E, Seattle, WA 98102\n",
       "90  Roy Street Commons  621 12th Avenue East, Seattle, Washington 98102"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[df['name'] == 'Roy Street Commons']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['name_address'] = df['name'] + ' ' + df['address']\n",
    "name_address = df['name_address']\n",
    "vectorizer = TfidfVectorizer(\"char\", ngram_range=(1, 4), sublinear_tf=True)\n",
    "tf_idf_matrix = vectorizer.fit_transform(name_address)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<168x3218 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 6002 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tf_idf_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "def awesome_cossim_top(A, B, ntop, lower_bound=0):\n",
    "  \n",
    "    A = A.tocsr()\n",
    "    B = B.tocsr()\n",
    "    M, _ = A.shape\n",
    "    _, N = B.shape\n",
    " \n",
    "    idx_dtype = np.int32\n",
    " \n",
    "    nnz_max = M*ntop\n",
    " \n",
    "    indptr = np.zeros(M+1, dtype=idx_dtype)\n",
    "    indices = np.zeros(nnz_max, dtype=idx_dtype)\n",
    "    data = np.zeros(nnz_max, dtype=A.dtype)\n",
    "\n",
    "    ct.sparse_dot_topn(\n",
    "        M, N, np.asarray(A.indptr, dtype=idx_dtype),\n",
    "        np.asarray(A.indices, dtype=idx_dtype),\n",
    "        A.data,\n",
    "        np.asarray(B.indptr, dtype=idx_dtype),\n",
    "        np.asarray(B.indices, dtype=idx_dtype),\n",
    "        B.data,\n",
    "        ntop,\n",
    "        lower_bound,\n",
    "        indptr, indices, data)\n",
    "\n",
    "    return csr_matrix((data,indices,indptr),shape=(M,N))\n",
    "\n",
    "matches = awesome_cossim_top(tf_idf_matrix, tf_idf_matrix.transpose(), 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<168x168 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 840 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "matches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_matches_df(sparse_matrix, name_vector, top=840):\n",
    "    non_zeros = sparse_matrix.nonzero()\n",
    "    \n",
    "    sparserows = non_zeros[0]\n",
    "    sparsecols = non_zeros[1]\n",
    "    \n",
    "    if top:\n",
    "        nr_matches = top\n",
    "    else:\n",
    "        nr_matches = sparsecols.size\n",
    "    \n",
    "    left_side = np.empty([nr_matches], dtype=object)\n",
    "    right_side = np.empty([nr_matches], dtype=object)\n",
    "    similairity = np.zeros(nr_matches)\n",
    "    \n",
    "    for index in range(0, nr_matches):\n",
    "        left_side[index] = name_vector[sparserows[index]]\n",
    "        right_side[index] = name_vector[sparsecols[index]]\n",
    "        similairity[index] = sparse_matrix.data[index]\n",
    "    \n",
    "    return pd.DataFrame({'left_side': left_side,\n",
    "                          'right_side': right_side,\n",
    "                           'similarity': similairity})\n",
    "\n",
    "matches_df = get_matches_df(matches, name_address)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>left_side</th>\n",
       "      <th>right_side</th>\n",
       "      <th>similarity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>826</td>\n",
       "      <td>Pike's Place Lux Suites by Barsala 2nd Ave and...</td>\n",
       "      <td>Pike's Place Lux Suites by Barsala 2rd Ave and...</td>\n",
       "      <td>0.715406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>831</td>\n",
       "      <td>Pike's Place Lux Suites by Barsala 2rd Ave and...</td>\n",
       "      <td>Pike's Place Lux Suites by Barsala 2nd Ave and...</td>\n",
       "      <td>0.715406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>206</td>\n",
       "      <td>Holiday Inn Express &amp; Suites Seattle-City Cent...</td>\n",
       "      <td>Holiday Inn Express &amp; Suites Seattle City Cent...</td>\n",
       "      <td>0.712321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>256</td>\n",
       "      <td>Holiday Inn Express &amp; Suites Seattle City Cent...</td>\n",
       "      <td>Holiday Inn Express &amp; Suites Seattle-City Cent...</td>\n",
       "      <td>0.712321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>181</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>0.669974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>211</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>0.669974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>791</td>\n",
       "      <td>citizenM Seattle South Lake Union hotel 201 We...</td>\n",
       "      <td>citizenM Seattle South Lake Union hotel 201 We...</td>\n",
       "      <td>0.651961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>836</td>\n",
       "      <td>citizenM Seattle South Lake Union hotel 201 We...</td>\n",
       "      <td>citizenM Seattle South Lake Union hotel 201 We...</td>\n",
       "      <td>0.651961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>586</td>\n",
       "      <td>Quality Inn &amp; Suites Seattle Center 618 John S...</td>\n",
       "      <td>Quality Inn &amp; Suites Seattle Center 618 John S...</td>\n",
       "      <td>0.627400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>551</td>\n",
       "      <td>Quality Inn &amp; Suites Seattle Center 618 John S...</td>\n",
       "      <td>Quality Inn &amp; Suites Seattle Center 618 John S...</td>\n",
       "      <td>0.627400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>46</td>\n",
       "      <td>Hilton Garden Inn Seattle Downtown 1821 Boren ...</td>\n",
       "      <td>Hilton Garden Inn Seattle Downtown 1821 Boren ...</td>\n",
       "      <td>0.617412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Hilton Garden Inn Seattle Downtown 1821 Boren ...</td>\n",
       "      <td>Hilton Garden Inn Seattle Downtown 1821 Boren ...</td>\n",
       "      <td>0.617412</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>781</td>\n",
       "      <td>Hyatt Regency Lake Washington At SeattleS Sout...</td>\n",
       "      <td>Hyatt Regency Lake Washington At SeattleS Sout...</td>\n",
       "      <td>0.614371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>746</td>\n",
       "      <td>Hyatt Regency Lake Washington At SeattleS Sout...</td>\n",
       "      <td>Hyatt Regency Lake Washington At SeattleS Sout...</td>\n",
       "      <td>0.614371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>346</td>\n",
       "      <td>Home2 Suites by Hilton Seattle Airport 380 Upl...</td>\n",
       "      <td>Home2 Suites by Hilton Seattle Airport 380 Upl...</td>\n",
       "      <td>0.582787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>341</td>\n",
       "      <td>Home2 Suites by Hilton Seattle Airport 380 Upl...</td>\n",
       "      <td>Home2 Suites by Hilton Seattle Airport 380 Upl...</td>\n",
       "      <td>0.582787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>561</td>\n",
       "      <td>Renaissance Seattle Hotel 515 Madison Street, ...</td>\n",
       "      <td>Renaissance Seattle Hotel 515 Madison St, Seat...</td>\n",
       "      <td>0.531174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>546</td>\n",
       "      <td>Renaissance Seattle Hotel 515 Madison St, Seat...</td>\n",
       "      <td>Renaissance Seattle Hotel 515 Madison Street, ...</td>\n",
       "      <td>0.531174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>391</td>\n",
       "      <td>Days Inn by Wyndham Seattle North of Downtown ...</td>\n",
       "      <td>Travelodge by Wyndham Seattle North of Downtow...</td>\n",
       "      <td>0.528202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>396</td>\n",
       "      <td>Travelodge by Wyndham Seattle North of Downtow...</td>\n",
       "      <td>Days Inn by Wyndham Seattle North of Downtown ...</td>\n",
       "      <td>0.528202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>401</td>\n",
       "      <td>Comfort Inn &amp; Suites 13700 Aurora Ave N, Seatt...</td>\n",
       "      <td>Comfort Inn &amp; Suites 13700 Aurora Avenue North...</td>\n",
       "      <td>0.527696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>441</td>\n",
       "      <td>Comfort Inn &amp; Suites 13700 Aurora Avenue North...</td>\n",
       "      <td>Comfort Inn &amp; Suites 13700 Aurora Ave N, Seatt...</td>\n",
       "      <td>0.527696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>451</td>\n",
       "      <td>Roy Street Commons 621 12th Avenue East, Seatt...</td>\n",
       "      <td>Roy Street Commons 621 12th Ave E, Seattle, WA...</td>\n",
       "      <td>0.499497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>411</td>\n",
       "      <td>Roy Street Commons 621 12th Ave E, Seattle, WA...</td>\n",
       "      <td>Roy Street Commons 621 12th Avenue East, Seatt...</td>\n",
       "      <td>0.499497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>Crowne Plaza Seattle Downtown 1113 6th Ave, Se...</td>\n",
       "      <td>Crowne Plaza Seattle 1113 6th Ave, Seattle, Wa...</td>\n",
       "      <td>0.469867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>61</td>\n",
       "      <td>Crowne Plaza Seattle 1113 6th Ave, Seattle, Wa...</td>\n",
       "      <td>Crowne Plaza Seattle Downtown 1113 6th Ave, Se...</td>\n",
       "      <td>0.469867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>671</td>\n",
       "      <td>Watertown Hotel - A Staypineapple Hotel 4242 R...</td>\n",
       "      <td>University Inn - A Staypineapple Hotel 4140 Ro...</td>\n",
       "      <td>0.432854</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>706</td>\n",
       "      <td>University Inn - A Staypineapple Hotel 4140 Ro...</td>\n",
       "      <td>Watertown Hotel - A Staypineapple Hotel 4242 R...</td>\n",
       "      <td>0.432854</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>621</td>\n",
       "      <td>Executive Inn By The Space Needle 200 Taylor A...</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>0.428319</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>182</td>\n",
       "      <td>Travelodge Seattle by The Space Needle 200 6th...</td>\n",
       "      <td>Executive Inn By The Space Needle 200 Taylor A...</td>\n",
       "      <td>0.428319</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             left_side  \\\n",
       "826  Pike's Place Lux Suites by Barsala 2nd Ave and...   \n",
       "831  Pike's Place Lux Suites by Barsala 2rd Ave and...   \n",
       "206  Holiday Inn Express & Suites Seattle-City Cent...   \n",
       "256  Holiday Inn Express & Suites Seattle City Cent...   \n",
       "181  Travelodge Seattle by The Space Needle 200 6th...   \n",
       "211  Travelodge Seattle by The Space Needle 200 6th...   \n",
       "791  citizenM Seattle South Lake Union hotel 201 We...   \n",
       "836  citizenM Seattle South Lake Union hotel 201 We...   \n",
       "586  Quality Inn & Suites Seattle Center 618 John S...   \n",
       "551  Quality Inn & Suites Seattle Center 618 John S...   \n",
       "46   Hilton Garden Inn Seattle Downtown 1821 Boren ...   \n",
       "1    Hilton Garden Inn Seattle Downtown 1821 Boren ...   \n",
       "781  Hyatt Regency Lake Washington At SeattleS Sout...   \n",
       "746  Hyatt Regency Lake Washington At SeattleS Sout...   \n",
       "346  Home2 Suites by Hilton Seattle Airport 380 Upl...   \n",
       "341  Home2 Suites by Hilton Seattle Airport 380 Upl...   \n",
       "561  Renaissance Seattle Hotel 515 Madison Street, ...   \n",
       "546  Renaissance Seattle Hotel 515 Madison St, Seat...   \n",
       "391  Days Inn by Wyndham Seattle North of Downtown ...   \n",
       "396  Travelodge by Wyndham Seattle North of Downtow...   \n",
       "401  Comfort Inn & Suites 13700 Aurora Ave N, Seatt...   \n",
       "441  Comfort Inn & Suites 13700 Aurora Avenue North...   \n",
       "451  Roy Street Commons 621 12th Avenue East, Seatt...   \n",
       "411  Roy Street Commons 621 12th Ave E, Seattle, WA...   \n",
       "11   Crowne Plaza Seattle Downtown 1113 6th Ave, Se...   \n",
       "61   Crowne Plaza Seattle 1113 6th Ave, Seattle, Wa...   \n",
       "671  Watertown Hotel - A Staypineapple Hotel 4242 R...   \n",
       "706  University Inn - A Staypineapple Hotel 4140 Ro...   \n",
       "621  Executive Inn By The Space Needle 200 Taylor A...   \n",
       "182  Travelodge Seattle by The Space Needle 200 6th...   \n",
       "\n",
       "                                            right_side  similarity  \n",
       "826  Pike's Place Lux Suites by Barsala 2rd Ave and...    0.715406  \n",
       "831  Pike's Place Lux Suites by Barsala 2nd Ave and...    0.715406  \n",
       "206  Holiday Inn Express & Suites Seattle City Cent...    0.712321  \n",
       "256  Holiday Inn Express & Suites Seattle-City Cent...    0.712321  \n",
       "181  Travelodge Seattle by The Space Needle 200 6th...    0.669974  \n",
       "211  Travelodge Seattle by The Space Needle 200 6th...    0.669974  \n",
       "791  citizenM Seattle South Lake Union hotel 201 We...    0.651961  \n",
       "836  citizenM Seattle South Lake Union hotel 201 We...    0.651961  \n",
       "586  Quality Inn & Suites Seattle Center 618 John S...    0.627400  \n",
       "551  Quality Inn & Suites Seattle Center 618 John S...    0.627400  \n",
       "46   Hilton Garden Inn Seattle Downtown 1821 Boren ...    0.617412  \n",
       "1    Hilton Garden Inn Seattle Downtown 1821 Boren ...    0.617412  \n",
       "781  Hyatt Regency Lake Washington At SeattleS Sout...    0.614371  \n",
       "746  Hyatt Regency Lake Washington At SeattleS Sout...    0.614371  \n",
       "346  Home2 Suites by Hilton Seattle Airport 380 Upl...    0.582787  \n",
       "341  Home2 Suites by Hilton Seattle Airport 380 Upl...    0.582787  \n",
       "561  Renaissance Seattle Hotel 515 Madison St, Seat...    0.531174  \n",
       "546  Renaissance Seattle Hotel 515 Madison Street, ...    0.531174  \n",
       "391  Travelodge by Wyndham Seattle North of Downtow...    0.528202  \n",
       "396  Days Inn by Wyndham Seattle North of Downtown ...    0.528202  \n",
       "401  Comfort Inn & Suites 13700 Aurora Avenue North...    0.527696  \n",
       "441  Comfort Inn & Suites 13700 Aurora Ave N, Seatt...    0.527696  \n",
       "451  Roy Street Commons 621 12th Ave E, Seattle, WA...    0.499497  \n",
       "411  Roy Street Commons 621 12th Avenue East, Seatt...    0.499497  \n",
       "11   Crowne Plaza Seattle 1113 6th Ave, Seattle, Wa...    0.469867  \n",
       "61   Crowne Plaza Seattle Downtown 1113 6th Ave, Se...    0.469867  \n",
       "671  University Inn - A Staypineapple Hotel 4140 Ro...    0.432854  \n",
       "706  Watertown Hotel - A Staypineapple Hotel 4242 R...    0.432854  \n",
       "621  Travelodge Seattle by The Space Needle 200 6th...    0.428319  \n",
       "182  Executive Inn By The Space Needle 200 Taylor A...    0.428319  "
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "matches_df[matches_df['similarity'] < 0.99999].sort_values(by=['similarity'], ascending=False).head(30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "152"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "matches_df[matches_df['similarity'] < 0.50].right_side.nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
